#!/bin/sh
# ctdb event script for checking local file system utilization

[ -n "$CTDB_BASE" ] ||
	CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD")

. "${CTDB_BASE}/functions"

load_script_options

ctdb_setup_state_dir "service" "system-monitoring"

validate_percentage()
{
	case "$1" in
	"") return 1 ;; # A failure that doesn't need a warning
	[0-9] | [0-9][0-9] | 100) return 0 ;;
	*)
		echo "WARNING: ${1} is an invalid percentage in \"${2}\" check"
		return 1
		;;
	esac
}

check_thresholds()
{
	_thing="$1"
	_thresholds="$2"
	_usage="$3"
	_unhealthy_callout="$4"

	case "$_thresholds" in
	*:*)
		_warn_threshold="${_thresholds%:*}"
		_unhealthy_threshold="${_thresholds#*:}"
		;;
	*)
		_warn_threshold="$_thresholds"
		_unhealthy_threshold=""
		;;
	esac

	_t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g')
	# script_state_dir set by ctdb_setup_state_dir()
	# shellcheck disable=SC2154
	_cache="${script_state_dir}/cache_${_t}"
	if [ -r "$_cache" ]; then
		read -r _prev <"$_cache"
	else
		_prev=0
	fi
	if validate_percentage "$_unhealthy_threshold" "$_thing"; then
		if [ "$_usage" -ge "$_unhealthy_threshold" ]; then
			printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \
			       "$_thing" \
			       "$_usage" \
			       "$_unhealthy_threshold"
			# Only run unhealthy callout if passing the
			# unhealthy threshold.  That is, if the
			# previous usage was below the threshold.
			if [ "$_prev" -lt "$_unhealthy_threshold" ]; then
				eval "$_unhealthy_callout"
			fi
			echo "$_usage" >"$_cache"
			exit 1
		fi
	fi

	if validate_percentage "$_warn_threshold" "$_thing"; then
		if [ "$_usage" -ge "$_warn_threshold" ]; then
			if [ "$_usage" = "$_prev" ]; then
				return
			fi
			printf 'WARNING: %s utilization %d%% >= threshold %d%%\n' \
			       "$_thing" \
			       "$_usage" \
			       "$_warn_threshold"
			echo "$_usage" >"$_cache"
		else
			if [ ! -r "$_cache" ]; then
				return
			fi
			printf 'NOTICE: %s utilization %d%% < threshold %d%%\n' \
			       "$_thing" \
			       "$_usage" \
			       "$_warn_threshold"
			rm -f "$_cache"
		fi
	fi
}

set_monitor_filsystem_usage_defaults()
{
	_fs_defaults_cache="${script_state_dir}/cache_filsystem_usage_defaults"

	if [ ! -r "$_fs_defaults_cache" ]; then
		# Determine filesystem for each database directory, generate
		# an entry to warn at 90%, de-duplicate entries, put all items
		# on 1 line (so the read below gets everything)
		for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \
			"${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \
			"${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}"; do
			df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }'
		done | sort -u | xargs >"$_fs_defaults_cache"
	fi

	read -r CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache"
}

monitor_filesystem_usage()
{
	if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ]; then
		set_monitor_filsystem_usage_defaults
	fi

	# Check each specified filesystem, specified in format
	# <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold]
	for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE; do
		_fs_mount="${_fs%%:*}"
		_fs_thresholds="${_fs#*:}"

		if [ ! -d "$_fs_mount" ]; then
			echo "WARNING: Directory ${_fs_mount} does not exist"
			continue
		fi

		# Get current utilization
		_fs_usage=$(df -kP "$_fs_mount" |
			sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p')
		if [ -z "$_fs_usage" ]; then
			printf 'WARNING: Unable to get FS utilization for %s\n' \
			       "$_fs_mount"
			continue
		fi

		check_thresholds "Filesystem ${_fs_mount}" \
			"$_fs_thresholds" \
			"$_fs_usage"
	done
}

# shellcheck disable=SC2317
# Called indirectly via check_thresholds()
dump_memory_info()
{
	get_proc "meminfo"
	ps auxfww
	set_proc "sysrq-trigger" "m"
}

monitor_memory_usage()
{
	# Defaults
	if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ]; then
		CTDB_MONITOR_MEMORY_USAGE=80
	fi

	_meminfo=$(get_proc "meminfo")
	# Intentional word splitting here
	# shellcheck disable=SC2046
	set -- $(echo "$_meminfo" | awk '
$1 == "MemAvailable:" { memavail += $2 }
$1 == "MemFree:"      { memfree  += $2 }
$1 == "Cached:"       { memfree  += $2 }
$1 == "Buffers:"      { memfree  += $2 }
$1 == "MemTotal:"     { memtotal  = $2 }
$1 == "SwapFree:"     { swapfree  = $2 }
$1 == "SwapTotal:"    { swaptotal = $2 }
END {
    if (memavail != 0) { memfree = memavail ; }
    if (memtotal + swaptotal != 0) {
	usedtotal = memtotal - memfree + swaptotal - swapfree
	print int(usedtotal / (memtotal + swaptotal) * 100)
    } else {
	print 0
    }
}')
	_mem_usage="$1"

	check_thresholds "System memory" \
		"$CTDB_MONITOR_MEMORY_USAGE" \
		"$_mem_usage" \
		dump_memory_info
}

case "$1" in
monitor)
	# Load/cache database options from configuration file
	ctdb_get_db_options

	rc=0
	monitor_filesystem_usage || rc=$?
	monitor_memory_usage || rc=$?
	exit $rc
	;;
esac

exit 0
